clear all
set matsize 10000
set more off
cap log close
cd "${master_dir}"
log using "${log_dir}/4-Appendix.log", replace
***************************************************************************************************
* 
* Program: 4-Appendix.do
* Purpose: Appendix Figures and Tables -- Following same outline as paper
* Sections:
*     1. Set globals and define programs used throughout
*     2. Appendix A (Figures and Tables)
*     3. Appendix B (Mechanisms)
*     4. Appendix C (Descriptive Stats by State, CD, and County)
* Files Used:
*     1. voterwaittimes.dta
*     2. cces_comparison.dta
*     3. county_voterwaittimes.dta
*     4. Polling_Places_Voters_AA_Nums.dta
* Files Created:
*     1. 4-Appendix.log
*     2. Table A1: app_A_t1_sumstats.tex
*     3. Figure A3: app_A_f3a_openarrival.png, app_A_f3b_closearrival.png
*     4. Figure A4: app_A_f4_cces.png, app_A_f4a_ccesstate.png, app_A_f4a_ccescd.png
*     5. Figure A5: app_A_f5_otherraces.png
*     6. Figure A6: app_A_f6_poverty.png
*     7. Figure A7: app_A_f7_strictfilter.png
*     8. Table A2: app_A_t2_ols_fullcoef.tex
*     9. Table A3: app_A_t3_lpm_fullcoef.tex
*     10. Table A5: app_A_t5_strictfilter.tex
*     11. Figure B1: app_B_f1_bunching.png
*     12. Figure B2: app_B_f2_partisan.png
*     13. Figure B3: app_B_f3_predictors.png
*     14. Figure B4: app_B_f4_congestion.png 
*     15. Table B1: app_B_t1_bunching.tex
*     16. Table B2: app_B_t2_buildingtype.tex
*     17. Table B3: app_B_t3_congestion_overall.tex
*     18. Table B4: app_B_t4_congestion_interact.tex
*     19. Table C1: app_t8_state_raw
*     20. Table C2: app_t9_cd_raw
*     21. Table C3: app_t10_bigcounty_raw
* Notes:
*     Appendix Figure A1 produced in ArcGIS
*     Appendix Figures A2 & A8 produced in 5-PlaceboDayAnalysis.do
*     Appendix Table A4 produced in 3-PrimaryAnalysis.do
*
***************************************************************************************************

***************************************************************************************************
*  1. Set globals and define programs used throughout
***************************************************************************************************

* Program to find medians to label in graph
cap program drop textbox_stats
program define textbox_stats, rclass
	sum $measure, d
	global median: di %6.0fc `r(p50)'
	global mean: di %6.0fc `r(mean)'
	global sd: di %6.0fc `r(sd)'
	sum $measure if $group_decile==1, d
	local median_d1= `r(p50)'
	sum $measure if $group_decile==10, d
	local median_d10=`r(p50)'
	local perc = (`median_d10'-`median_d1')*100/`median_d1'
	global percent: di %6.0fc `perc'
	global median_d1: di %6.0fc `median_d1'
	global median_d10: di %6.0fc `median_d10'
end

* Set globals
global measure waittime
global group_decile black_decile
global other_race = "race_asianpi race_hispanic race_othernonwhite"
global controls = "pov_under_poverty_line pop pop_sqmi"
global extracontrols = "i.hour_of_arrival android"

***************************************************************************************************
*  2. Appendix A (Figures and Tables)
***************************************************************************************************

**********
* Appendix Table A1: Summary Statistics
**********

use "${data_dir}/voterwaittimes.dta", clear
keep if likelyvoter_v1 == 1 & enteredpoll == 1 & consistentpinger == 1 & reasonablevalues == 1

* Construct "waittime" variables that have single filters applied, just for this table
gen waittime1 = waittime
gen waittime2 = lowerbound
gen waittime3 = upperbound
gen waittime4 = wait_over_30min
label var waittime1 "Primary Wait Time Measure (Midpoint)"
label var waittime2 "Lower Bound Wait Time Measure"
label var waittime3 "Upper Bound Wait Time Measure"
label var waittime4 "Wait Time Is Over 30min"
global waittime_versions "waittime1 waittime2 waittime3 waittime4" 
global race "race_white race_black race_asianpi race_hispanic race_othernonwhite"
global pollcharacteristics "pov_under_poverty_line pop pop_sqmi" 

eststo sumstat: estpost tabstat $waittime_versions $race $pollcharacteristics, col(stats) ///
  stats(count mean sd min p10 p50 p90 max)
esttab sumstat using $result_dir/app_A_t1_sumstats.tex, replace nonote noobs label ///
  cells((count(fmt(%16.0gc)) mean(fmt(%16.2fc)) sd(fmt(%16.2fc)) min(fmt(%16.2fc)) ///
  p10(fmt(%16.2fc)) p50(fmt(%16.2fc)) p90(fmt(%16.2fc)) max(fmt(%16.2fc)))) ///
  refcat(waittime1 "\textbf{Wait Time Measures}" race_white ///
  "\textbf{Race Fractions in Polling Area}" pov_under_poverty_line "\textbf{Other Demographics}" ///
  , nolabel)
  
**********
* Appendix Figure A3: Volume by Hour of Arrival (by Open vs. Close time variation)
**********

* Appendix Figure A3a: Volume of Voters by Hour of Arrival - Early Open (6am) vs. Late Open (7am) States
tw (histogram hour_of_arrival if poll_open_early == 0, discrete fcolor(vermillion) ///
  fintensity(50) lcolor(vermillion) legend(order(1 "7am Open" 2 "6am Open"))) ///
  (histogram hour_of_arrival if poll_open_early == 1, ///
  discrete fcolor(none) lcolor(black)), ///
  xtitle("Hour of Election Day") $white xlab(1(1)23, nogrid) ytitle("Density") ///
  text(.102 5.9 "6am", place(n)) ///
  text(.113 6.9 "7am", place(n)) ylab(0(.01).12,nogrid format(%16.0gc) angle(horizontal)) ///
  title("(a) Number of Voters (6am vs. 7am Open)", size(huge)) ///
  saving($result_dir/app_A_f3a_openarrival.gph,replace)
graph export "$result_dir/app_A_f3a_openarrival.png",  replace 

* Appendix Figure 3b: Volume by Hour of Arrival: Early (7pm) vs. Late (8pm) Close States
tw (histogram hour_of_arrival if poll_close_early == 1, discrete fcolor(vermillion) ///
  fintensity(50) lcolor(vermillion) legend(order(1 "7pm Close" 2 "8pm Close"))) ///
  (histogram hour_of_arrival if poll_close_early == 0, ///
  discrete fcolor(none) lcolor(black)), xtitle("Hour of Election Day") $white ///
  xlab(1(1)23, nogrid) ytitle("Density") text(.0355 19.1 "7pm", place(n)) ///
  text(.0045 20.1 "8pm", place(n)) ylab(0(.01).12,nogrid format(%16.0gc) angle(horizontal)) ///
  title("(b) Number of Voters (7pm vs. 8pm Close)", size(huge)) ///
  saving($result_dir/app_A_f3b_closearrival.gph,replace)
graph export "$result_dir/app_A_f3b_closearrival.png",  replace

**********
* Appendix Figure A4: Validation with CCES
**********
use "${data_dir}/cces_comparison.dta", clear

sum state_cces_waittime_avg_ebayes, meanonly
local xmin = 0
local xmax = r(max)
twoway (scatter state_waittime_avg_ebayes state_cces_waittime_avg_ebayes if tag_state == 1, ///
        msize(vlarge) msymbol(oh)) ///
       (function y=x, range(`xmin' `xmax') n(2) sort lcolor(red)) ///
       (lfit state_waittime_avg_ebayes state_cces_waittime_avg_ebayes if tag_state == 1 , ///
        lcolor(gs10)), $white ylab(0(5)40, nogrid labsize(large)) xlab(0(5)30, labsize(large)) ///
        xtitle("CCES State Average Wait Time", size(vlarge)) ///
        ytitle("Smartphone State Average Wait Time", size(vlarge)) ///
        title("(a) State Estimates", size(huge)) ///
        legend(off) saving($result_dir/app_A_f4a_ccesstate.gph,replace)
graph export $result_dir/app_A_f4a_ccesstate.png, replace
corr state_waittime_avg_ebayes state_cces_waittime_avg_ebayes if tag_state == 1

twoway (scatter cd_waittime_avg_ebayes cd_cces_waittime_avg_ebayes, ///
        msize(vlarge) msymbol(oh)) ///
       (function y=x, range(`xmin' `xmax') n(2) sort lcolor(red)) ///
       (lfit cd_waittime_avg_ebayes cd_cces_waittime_avg_ebayes, ///
        lcolor(gs10)), $white ylab(0(5)40, nogrid labsize(large)) xlab(0(5)30, labsize(large)) ///
        xtitle("CCES Congressional District Average Wait Time", size(vlarge)) ///
        ytitle("Smartphone Congressional District Average Wait Time", size(vlarge)) ///
        title("(b) Congressional District Estimates", size(huge)) ///
        legend(off) saving($result_dir/app_A_f4b_ccescd.gph,replace)
graph export $result_dir/app_A_f4b_ccescd.png, replace
corr cd_waittime_avg_ebayes cd_cces_waittime_avg_ebayes
 
* Combine subfigures into one
graph combine "$result_dir/app_A_f4a_ccesstate" "$result_dir/app_A_f4b_ccescd", ///
  graphregion(fcolor(white) lcolor(white)) imargin(small) iscale(* .6) ///
  cols(2) rows(1) saving($result_dir/app_A_f4_cces.gph, replace)
graph use $result_dir/app_A_f4_cces.gph
graph export $result_dir/app_A_f4_cces.png, replace

**********
* Appendix Figure A5: Figure 3 for Other Race Categories
**********

cd "${master_dir}"
use "${data_dir}/voterwaittimes.dta", clear
keep if likelyvoter_v1 == 1 & enteredpoll == 1 & consistentpinger == 1 & reasonablevalues == 1

* Construct Deciles of Fraction Race & Poverty variables
xtile hispanic_decile = race_hispanic, nquantiles(10)
xtile asian_decile = race_asian, nquantiles(10)
xtile othernonwhite_decile = race_othernonwhite, nquantiles(10)
xtile allnonwhite_decile = race_allnonwhite, nquantiles(10)

* Hispanic Deciles
global group_decile hispanic_decile
bysort $group_decile: sum race_hispanic waittime
unique PollingPlace_ID if $group_decile == 1
unique PollingPlace_ID if $group_decile == 10
twoway (kdensity waittime if $group_decile == 1, bwidth(1) color(vermillion%100)) ///
       (kdensity waittime if $group_decile == 10, bwidth(1) color(black) lpattern(dash) ///
       ytitle("Kernel Density", size(medium)) ylab(,nogrid format(%16.0gc) angle(horizontal)) ///
       xtitle("Wait Time in Minutes", size(medium)) legend(off) xlab(0(10)120, format(%16.0gc)) ///
       $white text(.050 36.5 "Lowest Fraction Hispanic (1st Decile)", color(vermillion)) ///
       text(.017 51.5 "Highest Fraction Hispanic (10th Decile)") ///
       title("(a) Hispanic") ///
       saving($result_dir/app_A_f5a_hispanic.gph,replace))
graph export "$result_dir/app_A_f5a_hispanic.png",  replace 

* Asian Deciles
global group_decile asian_decile
bysort $group_decile: sum race_asian waittime
unique PollingPlace_ID if $group_decile == 1
unique PollingPlace_ID if $group_decile == 10
twoway (kdensity waittime if $group_decile == 1, bwidth(1) color("vermillion%100")) ///
       (kdensity waittime if $group_decile == 10, bwidth(1) color(black) lpattern(dash) ///
       ytitle("Kernel Density", size(medium)) ylab(,nogrid format(%16.0gc) angle(horizontal)) ///
       xtitle("Wait Time in Minutes", size(medium)) legend(off) xlab(0(10)120, format(%16.0gc)) ///
       $white text(.050 35.5 "Lowest Fraction Asian (1st Decile)", color(vermillion)) ///
       text(.017 50.5 "Highest Fraction Asian (10th Decile)") ///
       title("(b) Asian") ///
       saving($result_dir/app_A_f5b_asian.gph,replace))
graph export "$result_dir/app_A_f5b_asian.png",  replace 

* Other Non-White Deciles
global group_decile othernonwhite_decile
bysort $group_decile: sum race_othernonwhite waittime
unique PollingPlace_ID if $group_decile == 1
unique PollingPlace_ID if $group_decile == 10
twoway (kdensity waittime if $group_decile == 1, bwidth(1) color("vermillion%100")) ///
       (kdensity waittime if $group_decile == 10, bwidth(1) color(black) lpattern(dash) ///
       ytitle("Kernel Density", size(medium)) ylab(,nogrid format(%16.0gc) angle(horizontal)) ///
       xtitle("Wait Time in Minutes", size(medium)) legend(off) xlab(0(10)120, format(%16.0gc)) ///
       $white text(.050 43.5 "Lowest Fraction Other Non-White (1st Decile)", color(vermillion)) ///
       title("(c) Other Non-White") ///
       text(.017 58.5 "Highest Fraction Other Non-White (10th Decile)") ///
       saving($result_dir/app_A_f5c_othernonwhite.gph,replace))
graph export "$result_dir/app_A_f5c_othernonwhite.png",  replace 

* All Non-White Deciles
global group_decile allnonwhite_decile
bysort $group_decile: sum race_allnonwhite waittime
unique PollingPlace_ID if $group_decile == 1
unique PollingPlace_ID if $group_decile == 10
twoway (kdensity waittime if $group_decile == 1, bwidth(1) color("vermillion%100")) ///
       (kdensity waittime if $group_decile == 10, bwidth(1) color(black) lpattern(dash) ///
       ytitle("Kernel Density", size(medium)) ylab(,nogrid format(%16.0gc) angle(horizontal)) ///
       xtitle("Wait Time in Minutes", size(medium)) legend(off) xlab(0(10)120, format(%16.0gc)) ///
       $white text(.050 42.5 "Lowest Fraction All Non-White (1st Decile)", color(vermillion)) ///
       text(.017 57.5 "Highest Fraction All Non-White (10th Decile)") ///
       title("(d) All Non-White") ///
       saving($result_dir/app_A_f5d_allnonwhite.gph,replace))
graph export "$result_dir/app_A_f5d_allnonwhite.png",  replace 

graph combine "$result_dir/app_A_f5a_hispanic" "$result_dir/app_A_f5b_asian" ///
  "$result_dir/app_A_f5c_othernonwhite" "$result_dir/app_A_f5d_allnonwhite", ///
  graphregion(fcolor(white) lcolor(white)) imargin(20) iscale(* .6) ///
  cols(2) rows(2) saving($result_dir/app_A_f5_otherraces.gph, replace)
graph use $result_dir/app_A_f5_otherraces.gph
graph export $result_dir/app_A_f5_otherraces.png, replace

**********
* Appendix Figure A6: Figure 3 for Poverty Measure
**********

xtile poverty_decile = pov_under_poverty_line, nquantiles(10)
global group_decile poverty_decile
bysort $group_decile: sum pov_under_poverty_line waittime
unique PollingPlace_ID if $group_decile == 1
unique PollingPlace_ID if $group_decile == 10
twoway (kdensity waittime if $group_decile == 1, bwidth(1) color("vermillion%100")) ///
       (kdensity waittime if $group_decile == 10, bwidth(1) color(black) lpattern(dash) ///
       ytitle("Kernel Density", size(medium)) ylab(,nogrid format(%16.0gc) angle(horizontal)) ///
       xtitle("Wait Time in Minutes", size(medium)) legend(off) xlab(0(10)120, format(%16.0gc)) ///
       $white text(.050 31.5 "Lowest Poverty (1st Decile)", color(vermillion)) ///
       text(.017 46.5 "Highest Poverty (10th Decile)") ///
       saving($result_dir/app_A_f6_poverty.gph,replace))
graph export "$result_dir/app_A_f6_poverty.png",  replace 

**********
* Appendix Figure A7: Figure 3 with Stricter Likely Voter Filter (likelyvoter_v2)
**********

use "${data_dir}/voterwaittimes.dta", clear
keep if likelyvoter_v2 == 1 & enteredpoll == 1 & consistentpinger == 1 & reasonablevalues == 1

xtile black_decile = race_black, nquantiles(10)
bysort black_decile: sum race_black waittime
unique PollingPlace_ID if black_decile == 1
unique PollingPlace_ID if black_decile == 10
twoway (kdensity waittime if black_decile == 1, bwidth(1) color("vermillion%100")) ///
       (kdensity waittime if black_decile == 10, bwidth(1) color(black) lpattern(dash) ///
       ytitle("Kernel Density", size(medium)) ylab(,nogrid format(%16.0gc) angle(horizontal)) ///
       xtitle("Wait Time in Minutes", size(medium)) legend(off) xlab(0(10)120, format(%16.0gc)) ///
       $white text(.050 35.5 "Lowest Fraction Black (1st Decile)", color(vermillion)) ///
       text(.017 50.5 "Highest Fraction Black (10th Decile)") ///
       saving($result_dir/app_A_f7_strictfilter.gph,replace))
graph export "$result_dir/app_A_f7_strictfilter.png",  replace 

sum race_black waittime wait_over_30min if black_decile == 1
sum race_black waittime wait_over_30min if black_decile == 10
unique PollingPlace_ID if black_decile == 1
unique PollingPlace_ID if black_decile == 10

**********
* Appendix Table A2: Repeat Table 2, Panel A but show full set of coefficients (& add extra columns)
**********
use "${data_dir}/voterwaittimes.dta", clear
keep if likelyvoter_v1 == 1 & enteredpoll == 1 & consistentpinger == 1 & reasonablevalues == 1

* Panel A -- Ordinary Least Squares
eststo M1: reg waittime race_black, cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M2: reg waittime race_black $other_race, cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M3: reg waittime race_black $other_race $controls, cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M4: areg waittime race_black $other_race $controls, abs(statefips) cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M5: areg waittime race_black $other_race $controls, abs(statecountyfips) cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M6: areg waittime race_black $other_race $controls $extracontrols, abs(statecountyfips) ///
  cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)

#d ;
esttab M1 M2 M3 M4 M5 M6 using $result_dir/app_A_t2_ols_fullcoef.tex, replace 
  keep(race_black race_asianpi race_hispanic race_othernonwhite $controls android) 
  cells(b(star fmt(2)) se(par fmt(2))) legend sty(fixed) star(* 0.10 ** 0.05 *** 0.01) stat(N r2 
  DepVarMean, fmt(%16.0gc 2 2) label("N" "\$R^2$")) 
  noabbrev label gaps varwidth(30) booktabs nodep nonum nomtitles collabels(none)
  prehead(`"{"' `"\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}"'
  `"\begin{tabular}{l*{6}{c}}"'`"\toprule"' `"&\multicolumn{1}{c}{(1)}&\multicolumn{1}{c}{(2)}
  &\multicolumn{1}{c}{(3)}&\multicolumn{1}{c}{(4)}&\multicolumn{1}{c}{(5)}
  &\multicolumn{1}{c}{(6)}\\"')
  postfoot(`"Polling Area Controls? &No&No&Yes&Yes&Yes&Yes \\"'
  `"State FE?                       &No&No&No&Yes&Yes&Yes \\"'
  `"County FE?                &No&No&No&No&Yes&Yes \\"'
  `"Hour of Day FE?                 &No&No&No&No&No&Yes \\"'
  `"\hline\hline"' `"\bottomrule"' `"\multicolumn{7}{l}{\footnotesize \sym{*} \(p<0.10\),
  \sym{**} \(p<0.05\), \sym{***} \(p<0.01\)} \\"' `"\end{tabular}"' `"}"') ;
#d cr

**********
* Appendix Table A3: Repeat Table 2, Panel B but show full set of coefficients (& add extra columns)
**********
 
* Panel B -- Linear Probability Model
eststo M7: reg wait_over_30min race_black, cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M8: reg wait_over_30min race_black $other_race, cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M9: reg wait_over_30min race_black $other_race $controls, cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M10: areg wait_over_30min race_black $other_race $controls, abs(statefips) cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M11: areg wait_over_30min race_black $other_race $controls, abs(statecountyfips) ///
  cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M12: areg wait_over_30min race_black $other_race $controls $extracontrols, ///
  abs(statecountyfips) cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)

#d ;
esttab M7 M8 M9 M10 M11 M12 using $result_dir/app_A_t3_lpm_fullcoef.tex, replace 
  keep(race_black race_asianpi race_hispanic race_othernonwhite $controls android) 
  cells(b(star fmt(2)) se(par fmt(2))) legend sty(fixed) star(* 0.10 ** 0.05 *** 0.01) stat(N r2 
  DepVarMean, fmt(%16.0gc 2 2) label("N" "\$R^2$")) 
  noabbrev label gaps varwidth(30) booktabs nodep nonum nomtitles collabels(none)
  prehead(`"{"' `"\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}"'
  `"\begin{tabular}{l*{6}{c}}"'`"\toprule"' `"&\multicolumn{1}{c}{(1)}&\multicolumn{1}{c}{(2)}
  &\multicolumn{1}{c}{(3)}&\multicolumn{1}{c}{(4)}&\multicolumn{1}{c}{(5)}
  &\multicolumn{1}{c}{(6)}\\"')
  postfoot(`"Polling Area Controls? &No&No&Yes&Yes&Yes&Yes \\"'
  `"State FE?                       &No&No&No&Yes&Yes&Yes \\"'
  `"County FE?                &No&No&No&No&Yes&Yes \\"'
  `"Hour of Day FE?                 &No&No&No&No&No&Yes \\"'
  `"\hline\hline"' `"\bottomrule"' `"\multicolumn{7}{l}{\footnotesize \sym{*} \(p<0.10\),
  \sym{**} \(p<0.05\), \sym{***} \(p<0.01\)} \\"' `"\end{tabular}"' `"}"') ;
#d cr

**********
* Appendix Table A5: Table 2 of Paper with Stricter Likely Voter Filter (likelyvoter_v2)
**********

use "${data_dir}/voterwaittimes.dta", clear
keep if likelyvoter_v2 == 1 & enteredpoll == 1 & consistentpinger == 1 & reasonablevalues == 1

* Panel A -- Ordinary Least Squares
eststo M1: reg waittime race_black, cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M2: reg waittime race_black $other_race, cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M3: reg waittime race_black $other_race $controls, cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M4: areg waittime race_black $other_race $controls, abs(statefips) cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M5: areg waittime race_black $other_race $controls, abs(statecountyfips) cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
 
* Panel B -- Linear Probability Model
eststo M6: reg wait_over_30min race_black, cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M7: reg wait_over_30min race_black $other_race, cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M8: reg wait_over_30min race_black $other_race $controls, cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M9: areg wait_over_30min race_black $other_race $controls, abs(statefips) cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M10: areg wait_over_30min race_black $other_race $controls, abs(statecountyfips) ///
  cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)

#d ;
esttab M1 M2 M3 M4 M5 using $result_dir/app_A_t5_strictfilter.tex, replace 
  keep(race_black race_asianpi race_hispanic race_othernonwhite) 
  cells(b(star fmt(2)) se(par fmt(2))) legend sty(fixed) star(* 0.10 ** 0.05 *** 0.01) 
  stat(N r2 DepVarMean, fmt(%16.0gc 2 2) label("N" "\$R^2$")) 
  noabbrev label gaps varwidth(30) booktabs nodep nonum nomtitles collabels(none)
  prehead(`"{"' `"\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}"'
  `"\begin{tabular}{l*{5}{c}}"'`"\toprule"' `"&\multicolumn{1}{c}{(1)}&\multicolumn{1}{c}{(2)}
  &\multicolumn{1}{c}{(3)}&\multicolumn{1}{c}{(4)}&\multicolumn{1}{c}{(5)}\\"'
  `"\multicolumn{5}{l}{\textbf{Panel A: Ordinary Least Squares (Y = Wait Time)}} \\"' `"\hline"')
  postfoot(`"Polling Area Controls? &No&No&Yes&Yes&Yes \\"'
  `"State FE?                       &No&No&No&Yes&Yes \\"'
  `"County FE?                &No&No&No&No&Yes \\"'
  `"\hline"') ;
esttab M6 M7 M8 M9 M10 using $result_dir/app_A_t5_strictfilter.tex, append
  keep(race_black race_asianpi race_hispanic race_othernonwhite) 
  cells(b(star fmt(2)) se(par fmt(2))) legend sty(fixed) star(* 0.10 ** 0.05 *** 0.01) 
  stat(N r2 DepVarMean, fmt(%16.0gc 2 2) label("N" "\$R^2$")) 
  noabbrev label gaps varwidth(30) booktabs nodep nonum nomtitles collabels(none) 
  prehead(`"\multicolumn{5}{l}{\textbf{Panel B: Linear Probability Model (Y = Wait Time $>$ 30min)}} \\"' 
  `"\hline"') postfoot(`"Polling Area Controls? &No&No&Yes&Yes&Yes \\"'
  `"State FE?                       &No&No&No&Yes&Yes \\"'
  `"County FE?                &No&No&No&No&Yes \\"'
  `"\hline\hline"' `"\bottomrule"' 
  `"\multicolumn{6}{l}{\footnotesize \sym{*} \(p<0.10\), 
  \sym{**} \(p<0.05\), \sym{***} \(p<0.01\)} \\"' `"\end{tabular}"' `"}"') ;
#d cr

***************************************************************************************************
*  3. Appendix B (Mechanisms)
***************************************************************************************************

**********
* Appendix Figure B1: Arrival and Wait Times by Race
**********

cd "${master_dir}"
use "${data_dir}/voterwaittimes.dta", clear
keep if likelyvoter_v1 == 1 & enteredpoll == 1 & consistentpinger == 1 & reasonablevalues == 1
tab poll_open_early poll_close_early, m

* Exclude states that open after 7am or close before 7pm
* These are:
*  poll_open = 7.5 if inlist(state,"Arkansas") == 1
*  poll_open = 8 if state == "Nebraska" & HourOffset == 6
*  poll_open = . if inlist(state,"Idaho","Kansas","Massachusetts","Minnesota","New Hampshire", ///
*   "North Dakota","Tennessee","Vermont","Maine")
* AND
*  poll_close = 18 if inlist(state,"Kentucky","Indiana","Hawaii") == 1
*  poll_close = . if inlist(state,"Georgia","Kansas","New Hampshire","North Dakota") == 1

gen include = 1
replace include = 0 if (poll_open > 7 | poll_open == .)
replace include = 0 if (poll_close < 19 | poll_close == .)
egen tag_state = tag(state_name)
tab state_name if tag_state == 1 & include == 1

* Create and limit sample to 1st and 10th deciles of the fraction black variable
xtile black_decile = race_black, nquantiles(10)

* Sample restrictions
reg waittime race_black, cl(PollingPlace_ID)
keep if include == 1
reg waittime race_black, cl(PollingPlace_ID)
keep if (black_decile == 1 | black_decile == 10)
reg waittime race_black, cl(PollingPlace_ID)

* First create average wait time for the 2, and assign to all values (denominator condition in egen)
egen tag_hour = tag(hour_of_arrival)
bysort hour_of_arrival: egen avgwait_by_hour_black10 = mean(waittime / (black_decile == 10))
bysort hour_of_arrival: egen avgwait_by_hour_black1 = mean(waittime / (black_decile == 1))

tw (histogram hour_of_arrival if black_decile == 1 & hour >= 7 & hour <= 19, ///
    width(1) start(7) fcolor(vermillion) fintensity(50) ///
    lcolor(vermillion) legend(order(1 "Black 1st Decile" 2 "Black 10th Decile"))) ///
   (histogram hour_of_arrival if black_decile == 10 & hour >= 7 & hour <= 19, ///
    width(1) start(7) fcolor(none) lcolor(black) ylab(0(.01).14,nogrid format(%16.0gc) ///
    angle(horizontal)) ytitle("Density") xtitle("Hour of Day") ///
    xlab(7(1)19, format(%16.0gc)) $white saving($result_dir/app_B_f1_bunching.gph,replace))
graph export "$result_dir/app_B_f1_bunching.png", replace

**********
* Appendix Figure B2: Partisanship
**********
cd "${master_dir}"
use "${data_dir}/county_voterwaittimes.dta", clear

* Partisanship and State Racial Gap in Wait Times [Colored by Sec of State Party]
egen tag_state=tag(state_name)
twoway (scatter state_coef_ebayes state_republican_share2016 if sec_state_Rep == 1 & ///
        tag_state==1, mlabcolor(red) mlab(state_abb) mlabpos(0) m(i) mlabsize(vsmall)) ///
       (scatter state_coef_ebayes state_republican_share2016 if sec_state_Rep == 0 & ///
	    tag_state==1, mlabcolor(blue) mlab(state_abb) mlabpos(0) m(i) mlabsize(vsmall)) /// 
       (lfit state_coef_ebayes state_republican_share2016 if tag_state==1, ///
       lcolor(vermillion%70)), ///
       $white ylab(, nogrid labsize(vsmall)) title("(a) State Vote Share") ///
       xtitle("Republican Vote Share", size(small)) ///
       ytitle("Black-White Wait Time Disparity", size(small)) ///
       xlab(0(0.1).8) ylab(-15(5)15, nogrid valuelabel angle(horizontal)) ///
       yline(0, lcolor(gs12) lpat(solid)) legend(off) ///
       saving($result_dir/app_B_f2a_partisan_state.gph, replace)
graph export $result_dir/app_B_f2a_partisan_state.png, replace
reg state_coef_ebayes state_republican_share2016 if tag_state==1
corr state_coef_ebayes state_republican_share2016 if tag_state==1

* Partisanship and County Racial Gap in Wait Times [Colored by Sec of State Party]
sum county_coef, det
twoway (scatter county_coef_ebayes county_republican_share2016 if sec_state_Rep == 1, ///
        mcolor(red) msize(tiny)) ///
       (scatter county_coef_ebayes county_republican_share2016 if sec_state_Rep == 0, ///
        mcolor(blue) msize(tiny)) ///
       (lfit county_coef_ebayes county_republican_share2016, ///
        lcolor(vermillion%70)), $white ylab(, nogrid labsize(vsmall)) ///
        title("(b) County Vote Share") ///
        xtitle("Republican Vote Share", size(small)) ytitle("Black-White Wait Time Disparity", ///
        size(small)) xlab(0(0.1).9) ylab(-100(10)200, ///
        nogrid valuelabel angle(horizontal)) yline(0, lcolor(gs12) lpat(solid)) legend(off) ///
        saving($result_dir/app_B_f2b_partisan_county.gph,replace)
graph export $result_dir/app_B_f2b_partisan_county.png, replace
reg county_coef_ebayes county_republican_share2016
corr county_coef_ebayes county_republican_share2016

* Combine subfigures into one
graph combine "$result_dir/app_B_f2a_partisan_state" "$result_dir/app_B_f2b_partisan_county", ///
  graphregion(fcolor(white) lcolor(white)) imargin(10) iscale(* 1) ///
  cols(2) rows(1) saving($result_dir/app_B_f2_partisan.gph, replace)
graph use $result_dir/app_B_f2_partisan.gph
graph export $result_dir/app_B_f2_partisan.png, replace

**********
* Appendix Figure B3: Chetty-Hendren County Measures & State-Laws
**********

* County-Level Correlates:
cd "${master_dir}"
use "${data_dir}/county_voterwaittimes.dta", clear
egen tag_state = tag(state_name)

global varlist causal_p25_cty_kr26 causal_p75_cty_kr26 cs_race_theil_2000_st ///
  gini_st inc_share_1perc_st scap_ski90pcm_st
					
* Define matrix to store results 
mat define predictors = J(10,2,.)
matrix rownames predictors = earlyvotinglaw strictidlaw ${varlist}

* State Level Predictors
reg state_coef_ebayes earlyvotinglaw if tag_state == 1, robust
mat def predictors[1,1] = _b[earlyvotinglaw]
mat def predictors[1,2] = _se[earlyvotinglaw]

reg state_coef_ebayes strictidlaw if tag_state == 1, robust
mat def predictors[2,1] = _b[strictidlaw]
mat def predictors[2,2] = _se[strictidlaw]
	
* County Level Predictors
local row = 3
foreach var of global varlist {	
	reg county_coef_ebayes `var', robust
	mat def predictors[`row',1] = _b[`var']
	mat def predictors[`row',2] = _se[`var']

	local row = `row'+1
}
		
clear
svmat2 predictors, rnames(variables)
gen number = [_n]
order number variables
tempfile predictors
		
gen n = . 
replace n = 1 if [_n]==1
replace n = 2.5 if [_n]==2
replace n = 4 if [_n]==3
replace n = 5.5 if [_n]==4
replace n = 7 if [_n]==5
replace n = 8.5 if [_n]==6
replace n = 10 if [_n]==7	
replace n = 11.5 if [_n]==8	

gen ci_ub = predictors1 + 1.96*predictors2
gen ci_lb = predictors1 - 1.96*predictors2

twoway 	(bar predictors1 n, barwidth(0.9) color(navy) horizontal xline(0, lcolor(gs3) ///
        lwidth(0.33))) ///
		(rcap ci_lb ci_ub n, lcolor(gs3) lpattern(solid) horizontal), ///
        ylabel(1 `"STATE: Early Voting"' 2.5 `"STATE: Strict ID"' 4 ///
        `""Chetty p25" "Causal Effect""' 5.5 `""Chetty p75" "Causal Effect""'  7 ///
        `" "Theil Index of" "Racial Segregation""' 8.5 `"Gini Coefficient"' 10 ///
        `""Top 1%" "Income Share""' 11.5 `""Social Capital" "Index""', ///
        angle(horizontal) labsize(small)) title(" ") ytitle(" ") ///
		xtitle("Impact of Covariate on Black-White Wait Time Disparity") ///
        legend(off) graphregion(color(white)) $white saving($result_dir/app_B_f3_predictors.gph,replace)
graph export $result_dir/app_B_f3_predictors.png, replace

/**********
* Appendix Figure B4: Congestion
**********

cd "${master_dir}"
use "${data_dir}/voterwaittimes.dta", clear
keep if likelyvoter_v1 == 1 & enteredpoll == 1 & consistentpinger == 1 & reasonablevalues == 1
sort PollingPlace_ID
merge PollingPlace_ID using "${data_dir}/Polling_Places_Voters_AA_Nums.dta"
tab _m
keep if _m == 3

sum NumVoters_per_Poll, detail
local top1 = r(p99)
gen voterXblack = race_black*NumVoters_per_Poll
reg waittime race_black NumVoters_per_Poll voterXblack
gen predicted_black = _b[_cons] + _b[race_black] + (_b[NumVoters_per_Poll]*NumVoters_per_Poll) ///
  + (_b[voterXblack]*NumVoters_per_Poll)
gen predicted_nonblack = _b[_cons] + (_b[NumVoters_per_Poll]*NumVoters_per_Poll)

twoway (kdensity NumVoters_per_Poll if NumVoters_per_Poll <= `top1', bwidth(100) ///
        color("vermillion%100") yaxis(1) yscale(range(0) axis(1)) ytitle("Kernel Density", axis(1)) ///
        ylab(,nogrid format(%16.0gc) angle(horizontal) axis(1))) ///
       (line predicted_black NumVoters_per_Poll if NumVoters_per_Poll <= `top1', color("black") ///
        yaxis(2) yscale(range(0) axis(2)) ytitle("Predicted Wait Time", axis(2)) ///
        ylab(0(5)35, nogrid format(%16.0gc) angle(horizontal) axis(2))) ///
       (line predicted_nonblack NumVoters_per_Poll if NumVoters_per_Poll <= `top1', color("black") ///
        yaxis(2) yscale(range(0) axis(2)) ytitle("Predicted Wait Time", axis(2)) ///
        ylab(0(5)35, nogrid format(%16.0gc) angle(horizontal) axis(2)) ///
        xtitle("Number of Voters Per Polling Place", size(medium)) legend(off) xlab(0(1000)10000, ///
        format(%16.0gc)) $white text(.00019 8000 "Fraction Black = 0", color(vermillion)) ///
        text(.00028 8000 "Fraction Black = 1", color(vermillion)) ///
       saving($result_dir/app_B_f4_congestion.gph,replace))
graph export "$result_dir/app_B_f4_congestion.png",  replace 

egen tag_pollingplace = tag(PollingPlace_ID)
corr  NumVoters_per_Poll race_black if tag_pollingplace == 1
corr  NumVoters_per_Poll NumVotesCast_2016_per_Poll if tag_pollingplace == 1

**********/
* Appendix Table B1: Table 2, Column 4, Limiting Hour Window
**********

* Panel A -- OLS
use "${data_dir}/voterwaittimes.dta", clear
keep if likelyvoter_v1 == 1 & enteredpoll == 1 & consistentpinger == 1 & reasonablevalues == 1

eststo M1: areg waittime race_black $other_race $controls, abs(statefips) cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M2: areg waittime race_black $other_race $controls if hour>=8, abs(statefips) cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M3: areg waittime race_black $other_race $controls if hour>=9, abs(statefips) cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M4: areg waittime race_black $other_race $controls if hour>=10, abs(statefips) ///
  cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M5: areg waittime race_black $other_race $controls if hour>=10 & hour<=15, abs(statefips) ///
  cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M6: areg waittime race_black $other_race $controls if hour>=15, abs(statefips) ///
  cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
 
* Panel B -- LPM
eststo M7: areg wait_over_30min race_black $other_race $controls, abs(statefips) cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M8: areg wait_over_30min race_black $other_race $controls if hour>=8, abs(statefips) ///
  cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M9: areg wait_over_30min race_black $other_race $controls if hour>=9, abs(statefips) ///
  cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M10: areg wait_over_30min race_black $other_race $controls if hour>=10, abs(statefips) ///
  cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M11: areg wait_over_30min race_black $other_race $controls if hour>=10 & hour<=15, ///
  abs(statefips) cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M12: areg wait_over_30min race_black $other_race $controls if hour>=15, abs(statefips) ///
  cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
 
 
#d ;
esttab M1 M2 M3 M4 M5 M6 using $result_dir/app_B_t1_bunching.tex, replace 
  keep(race_black race_asianpi race_hispanic race_othernonwhite) 
  cells(b(star fmt(2)) se(par fmt(2))) legend sty(fixed) star(* 0.10 ** 0.05 *** 0.01) 
  stat(N r2 DepVarMean, fmt(%16.0gc 2 2) label("N" "\$R^2$")) 
  noabbrev label gaps varwidth(30) booktabs nodep nonum nomtitles collabels(none)
  prehead(`"{"' `"\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}"'
  `"\begin{tabular}{l*{6}{c}}"'`"\toprule"' `"&\multicolumn{1}{c}{(1)}&\multicolumn{1}{c}{(2)}
  &\multicolumn{1}{c}{(3)}&\multicolumn{1}{c}{(4)}&\multicolumn{1}{c}{(5)}&\multicolumn{1}{c}{(6)} \\"'
  `"\multicolumn{7}{l}{\textbf{Panel A: Ordinary Least Squares (Y = Wait Time)}} \\"' `"\hline"')
  postfoot(`"Sample? &Full& $\geq8am$ & $\geq9am$ & $\geq10am$ & 10am-3pm & $\geq3pm$ \\"'
  `"\hline"') ;
esttab M7 M8 M9 M10 M11 M12 using $result_dir/app_B_t1_bunching.tex, append
  keep(race_black race_asianpi race_hispanic race_othernonwhite) 
  cells(b(star fmt(2)) se(par fmt(2))) legend sty(fixed) star(* 0.10 ** 0.05 *** 0.01) 
  stat(N r2 DepVarMean, fmt(%16.0gc 2 2) label("N" "\$R^2$")) 
  noabbrev label gaps varwidth(30) booktabs nodep nonum nomtitles collabels(none) 
  prehead(
  `"\multicolumn{7}{l}{\textbf{Panel B: LPM (Y = Wait Time $>$ 30min)}} \\"'
  `"\hline"') 
  postfoot(`"Sample? &Full& $\geq8am$ & $\geq9am$ & $\geq10am$ & 10am-3pm & $\geq3pm$ \\"'
  `"\hline\hline"' `"\bottomrule"' 
  `"\multicolumn{7}{l}{\footnotesize \sym{*} \(p<0.10\), 
  \sym{**} \(p<0.05\), \sym{***} \(p<0.01\)} \\"' `"\end{tabular}"' `"}"') ;
#d cr

**********
* Appendix Table B2: Controlling for Building Category and Size
**********

* Create dummy variables for each building type category and label
gen commercial = category == "Commercial"
gen medical = category == "Medical"
gen private = category == "Private"
gen public = category == "Public"
gen religious = category == "Religious"
gen school = category == "School"
label var commercial "Poll: Commercial"
label var medical "Poll: Medical"
label var private "Poll: Private"
label var public "Poll: Public"
label var religious "Poll: Religious"
label var school "Poll: School"

* Replace building area to be in terms of 5,000 square meters, define squared term as well
replace building_area_m2=building_area_m2/5000 
gen building_area_m2_sq=(building_area_m2^2)
label var building_area_m2 "Poll: Building Area"

* Panel A -- OLS
eststo M1: reg waittime race_black, cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
 unique PollingPlace_ID if e(sample)
 estadd scalar PollingPlaces=r(unique)
eststo M2: reg waittime race_black building_area_m2 building_area_m2_s, cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
 unique PollingPlace_ID if e(sample)
 estadd scalar PollingPlaces=r(unique)
eststo M3: areg waittime race_black, abs(category) cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
 unique PollingPlace_ID if e(sample)
 estadd scalar PollingPlaces=r(unique)
eststo M4: areg waittime race_black, abs(subcategory) cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
 unique PollingPlace_ID if e(sample)
 estadd scalar PollingPlaces=r(unique)
eststo M5: reg waittime race_black if category == "Commercial", cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
 unique PollingPlace_ID if e(sample)
 estadd scalar PollingPlaces=r(unique)
eststo M6: reg waittime race_black if category == "Medical", cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
 unique PollingPlace_ID if e(sample)
 estadd scalar PollingPlaces=r(unique)
eststo M7: reg waittime race_black if category == "Private", cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
 unique PollingPlace_ID if e(sample)
 estadd scalar PollingPlaces=r(unique)
eststo M8: reg waittime race_black if category == "Public", cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
 unique PollingPlace_ID if e(sample)
 estadd scalar PollingPlaces=r(unique)
eststo M9: reg waittime race_black if category == "Religious", cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
 unique PollingPlace_ID if e(sample)
 estadd scalar PollingPlaces=r(unique)
eststo M10: reg waittime race_black if category == "School", cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
 unique PollingPlace_ID if e(sample)
 estadd scalar PollingPlaces=r(unique)
 
* Panel B -- LPM
eststo N1: reg wait_over_30min race_black, cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
 unique PollingPlace_ID if e(sample)
 estadd scalar PollingPlaces=r(unique)
eststo N2: reg wait_over_30min race_black building_area_m2 building_area_m2_sq, cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
 unique PollingPlace_ID if e(sample)
 estadd scalar PollingPlaces=r(unique)
eststo N3: areg wait_over_30min race_black, abs(category) cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
 unique PollingPlace_ID if e(sample)
 estadd scalar PollingPlaces=r(unique)
eststo N4: areg wait_over_30min race_black, abs(subcategory) cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
 unique PollingPlace_ID if e(sample)
 estadd scalar PollingPlaces=r(unique)
eststo N5: reg wait_over_30min race_black if category == "Commercial", cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
 unique PollingPlace_ID if e(sample)
 estadd scalar PollingPlaces=r(unique)
eststo N6: reg wait_over_30min race_black if category == "Medical", cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
 unique PollingPlace_ID if e(sample)
 estadd scalar PollingPlaces=r(unique)
eststo N7: reg wait_over_30min race_black if category == "Private", cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
 unique PollingPlace_ID if e(sample)
 estadd scalar PollingPlaces=r(unique)
eststo N8: reg wait_over_30min race_black if category == "Public", cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
 unique PollingPlace_ID if e(sample)
 estadd scalar PollingPlaces=r(unique)
eststo N9: reg wait_over_30min race_black if category == "Religious", cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
 unique PollingPlace_ID if e(sample)
 estadd scalar PollingPlaces=r(unique)
eststo N10: reg wait_over_30min race_black if category == "School", cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
 unique PollingPlace_ID if e(sample)
 estadd scalar PollingPlaces=r(unique)
 
* Panel C -- Building Category Predicts Fraction Black
eststo O1: reg race_black medical private public religious school, cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
 unique PollingPlace_ID if e(sample)
 estadd scalar PollingPlaces=r(unique) 
eststo O2: reg race_black building_area_m2, cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
 unique PollingPlace_ID if e(sample)
 estadd scalar PollingPlaces=r(unique) 
 
#d ;
esttab M1 M2 M3 M4 M5 M6 M7 M8 M9 M10 using $result_dir/app_B_t2_buildingtype.tex, replace 
  keep(race_black) 
  cells(b(star fmt(2)) se(par fmt(2))) legend sty(fixed) star(* 0.10 ** 0.05 *** 0.01) 
  stat(N r2 DepVarMean PollingPlaces, fmt(%16.0gc 2 2 %16.0gc) label("N" "\$R^2$")) 
  noabbrev label gaps varwidth(30) booktabs nodep nonum nomtitles collabels(none)
  prehead(`"{"' `"\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}"'
  `"\begin{tabular}{l*{10}{c}}"'`"\toprule"' `"&\multicolumn{1}{c}{(1)}&\multicolumn{1}{c}{(2)}
  &\multicolumn{1}{c}{(3)}&\multicolumn{1}{c}{(4)}&\multicolumn{1}{c}{(5)}
  &\multicolumn{1}{c}{(6)}&\multicolumn{1}{c}{(7)}&\multicolumn{1}{c}{(8)}
  &\multicolumn{1}{c}{(9)}&\multicolumn{1}{c}{(10)}\\"'
  `"\multicolumn{10}{l}{\textbf{Panel A: Ordinary Least Squares (Y = Wait Time)}} \\"' `"\hline"')
  postfoot(`"Category FE? &No&No&Yes&No&No&No&No&No&No&No \\"'
  `"Subcategory FE?       &No&No&No&Yes&No&No&No&No&No&No \\"'
  `"Subsample?            &All&All&All&All&Com&Med&Pri&Pub&Rel&Sch\\"'
  `"\hline"') ;
esttab N1 N2 N3 N4 N5 N6 N7 N8 N9 N10 using $result_dir/app_B_t2_buildingtype.tex, append
  keep(race_black)
  cells(b(star fmt(2)) se(par fmt(2))) legend sty(fixed) star(* 0.10 ** 0.05 *** 0.01) 
  stat(N r2 DepVarMean PollingPlaces, fmt(%16.0gc 2 2 %16.0gc) label("N" "\$R^2$")) 
  noabbrev label gaps varwidth(30) booktabs nodep nonum nomtitles collabels(none)
  prehead(
  `"\multicolumn{10}{l}{\textbf{Panel B: Linear Probability Model (Y = Wait Time $>$ 30min)}} \\"'
  `"\hline"') 
  postfoot(`"Category FE? &No&No&Yes&No&No&No&No&No&No&No \\"'
  `"Subcategory FE?       &No&No&No&Yes&No&No&No&No&No&No \\"'
  `"Subsample?            &All&All&All&All&Com&Med&Pri&Pub&Rel&Sch\\"'
  `"\hline"') ;
esttab O1 O2 using $result_dir/app_B_t2_buildingtype.tex, append
  keep(medical private public religious school building_area_m2)
  cells(b(star fmt(2)) se(par fmt(2))) legend sty(fixed) star(* 0.10 ** 0.05 *** 0.01) 
  stat(N r2 DepVarMean PollingPlaces, fmt(%16.0gc 2 2 %16.0gc) label("N" "\$R^2$")) 
  noabbrev label gaps varwidth(30) booktabs nodep nonum nomtitles collabels(none)
  prehead(
  `"\multicolumn{10}{l}{\textbf{Panel C: Do Building Characteristics Predict Race? (Y = Fraction Black)}} \\"' 
  `"\hline"') postfoot(`"\hline\hline"' `"\bottomrule"' 
  `"\multicolumn{11}{l}{\footnotesize \sym{*} \(p<0.10\), 
  \sym{**} \(p<0.05\), \sym{***} \(p<0.01\)} \\"' `"\end{tabular}"' `"}"') ;
#d cr

/**********
* Appendix Table B3: Congestion
**********

use "${data_dir}/voterwaittimes.dta", clear
keep if likelyvoter_v1 == 1 & enteredpoll == 1 & consistentpinger == 1 & reasonablevalues == 1
sort PollingPlace_ID
merge PollingPlace_ID using "${data_dir}/Polling_Places_Voters_AA_Nums.dta"
keep if _m == 3
replace NumVoters_per_Poll = NumVoters_per_Poll/1000
label var NumVoters_per_Poll "Voters Per Polling Place"

* Panel A -- OLS
eststo M1: reg waittime race_black, cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M2: reg waittime race_black NumVoters_per_Poll, cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M3: reg waittime race_black $other_race, cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M4: reg waittime race_black NumVoters_per_Poll $other_race, cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M5: reg waittime race_black $other_race $controls, cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M6: reg waittime race_black $other_race NumVoters_per_Poll $controls, cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M7: areg waittime race_black $other_race $controls, abs(statefips) cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M8: areg waittime race_black $other_race NumVoters_per_Poll $controls, ///
  abs(statefips) cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M9: areg waittime race_black $other_race $controls, abs(statecountyfips) cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M10: areg waittime race_black $other_race NumVoters_per_Poll $controls, ///
  abs(statecountyfips) cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
 
* Panel B -- LPM
eststo M11: reg wait_over_30min race_black, cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M12: reg wait_over_30min race_black NumVoters_per_Poll, cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M13: reg wait_over_30min race_black $other_race, cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M14: reg wait_over_30min race_black $other_race NumVoters_per_Poll, cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M15: reg wait_over_30min race_black $other_race $controls, cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M16: reg wait_over_30min race_black $other_race NumVoters_per_Poll $controls, ///
  cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M17: areg wait_over_30min race_black $other_race $controls, abs(statefips) cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M18: areg wait_over_30min race_black $other_race NumVoters_per_Poll $controls, ///
  abs(statefips) cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M19: areg wait_over_30min race_black $other_race $controls, abs(statecountyfips) ///
  cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M20: areg wait_over_30min race_black $other_race NumVoters_per_Poll $controls, ///
  abs(statecountyfips) cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
 
#d ;
esttab M1 M2 M3 M4 M5 M6 M7 M8 M9 M10 using $result_dir/app_B_t3_congestion_overall.tex, replace 
  keep(race_black race_asianpi race_hispanic race_othernonwhite NumVoters_per_Poll) 
  cells(b(star fmt(2)) se(par fmt(2))) legend sty(fixed) star(* 0.10 ** 0.05 *** 0.01) 
  stat(N r2 DepVarMean, fmt(%16.0gc 2 2) label("N" "\$R^2$")) 
  noabbrev label gaps varwidth(30) booktabs nodep nonum nomtitles collabels(none)
  prehead(`"{"' `"\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}"'
  `"\begin{tabular}{l*{10}{c}}"'`"\toprule"' `"&\multicolumn{1}{c}{(1)}&\multicolumn{1}{c}{(2)}
  &\multicolumn{1}{c}{(3)}&\multicolumn{1}{c}{(4)}&\multicolumn{1}{c}{(5)}&\multicolumn{1}{c}{(6)}
  &\multicolumn{1}{c}{(7)}&\multicolumn{1}{c}{(8)}&\multicolumn{1}{c}{(9)}&\multicolumn{1}{c}{(10)}\\"'
  `"\multicolumn{10}{l}{\textbf{Panel A: Ordinary Least Squares (Y = Wait Time)}} \\"' `"\hline"')
  postfoot(`"Polling Area Controls?  &No&No&No&No&Yes&Yes&Yes&Yes&Yes&Yes \\"'
  `"State FE?                        &No&No&No&No&No&No&Yes&Yes&Yes&Yes \\"'
  `"County FE?                       &No&No&No&No&No&No&No&No&Yes&Yes \\"'
  `"\hline"') ;
esttab M11 M12 M13 M14 M15 M16 M17 M18 M19 M20 using $result_dir/app_B_t3_congestion_overall.tex, 
  append keep(race_black race_asianpi race_hispanic race_othernonwhite NumVoters_per_Poll) 
  cells(b(star fmt(2)) se(par fmt(2))) legend sty(fixed) star(* 0.10 ** 0.05 *** 0.01) 
  stat(N r2 DepVarMean, fmt(%16.0gc 2 2) label("N" "\$R^2$")) 
  noabbrev label gaps varwidth(30) booktabs nodep nonum nomtitles collabels(none) 
  prehead(
  `"\multicolumn{10}{l}{\textbf{Panel B: Linear Probability Model (Y = Wait Time $>$ 30min)}} \\"'
  `"\hline"') postfoot(`"Polling Area Controls?  &No&No&No&No&Yes&Yes&Yes&Yes&Yes&Yes \\"'
  `"State FE?                        &No&No&No&No&No&No&Yes&Yes&Yes&Yes \\"'
  `"County FE?                       &No&No&No&No&No&No&No&No&Yes&Yes \\"'
  `"\hline\hline"' `"\bottomrule"' 
  `"\multicolumn{6}{l}{\footnotesize \sym{*} \(p<0.10\), 
  \sym{**} \(p<0.05\), \sym{***} \(p<0.01\)} \\"' `"\end{tabular}"' `"}"') ;
#d cr

**********
* Appendix Table B4: Congestion Interactions
**********

gen voterXblack = race_black*NumVoters_per_Poll
label var voterXblack "Interaction: Black X VotersPerPoll"

* Panel A -- OLS
eststo M1: reg waittime race_black NumVoters_per_Poll voterXblack, cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M2: reg waittime race_black NumVoters_per_Poll voterXblack $other_race, cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M3: reg waittime race_black NumVoters_per_Poll voterXblack $other_race $controls, ///
  cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M4: areg waittime race_black NumVoters_per_Poll voterXblack $other_race $controls, ///
  abs(statefips) cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M5: areg waittime race_black NumVoters_per_Poll voterXblack $other_race $controls, ///
  abs(statecountyfips) cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
 
* Panel B -- LPM
eststo M6: reg wait_over_30min race_black NumVoters_per_Poll voterXblack, cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M7: reg wait_over_30min race_black NumVoters_per_Poll voterXblack $other_race, ///
  cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M8: reg wait_over_30min race_black NumVoters_per_Poll voterXblack $other_race ///
  $controls, cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M9: areg wait_over_30min race_black NumVoters_per_Poll voterXblack $other_race ///
  $controls, abs(statefips) cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M10: areg wait_over_30min race_black NumVoters_per_Poll voterXblack $other_race ///
  $controls, abs(statecountyfips) cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
 
#d ;
esttab M1 M2 M3 M4 M5 using $result_dir/app_B_t4_congestion_interact.tex, replace 
  keep(race_black race_asianpi race_hispanic race_othernonwhite NumVoters_per_Poll voterXblack) 
  cells(b(star fmt(2)) se(par fmt(2))) legend sty(fixed) star(* 0.10 ** 0.05 *** 0.01) 
  stat(N r2 DepVarMean, fmt(%16.0gc 2 2) label("N" "\$R^2$")) 
  noabbrev label gaps varwidth(30) booktabs nodep nonum nomtitles collabels(none)
  prehead(`"{"' `"\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}"'
  `"\begin{tabular}{l*{5}{c}}"'`"\toprule"' `"&\multicolumn{1}{c}{(1)}&\multicolumn{1}{c}{(2)}
  &\multicolumn{1}{c}{(3)}&\multicolumn{1}{c}{(4)}&\multicolumn{1}{c}{(5)}\\"'
  `"\multicolumn{5}{l}{\textbf{Panel A: Ordinary Least Squares (Y = Wait Time)}} \\"' `"\hline"')
  postfoot(`"Polling Area Controls? &No&No&Yes&Yes&Yes \\"'
  `"State FE?                       &No&No&No&Yes&Yes \\"'
  `"County FE?                       &No&No&No&No&Yes \\"'
  `"\hline"') ;
esttab M6 M7 M8 M9 M10 using $result_dir/app_B_t4_congestion_interact.tex, append
  keep(race_black race_asianpi race_hispanic race_othernonwhite NumVoters_per_Poll voterXblack) 
  cells(b(star fmt(2)) se(par fmt(2))) legend sty(fixed) star(* 0.10 ** 0.05 *** 0.01) 
  stat(N r2 DepVarMean, fmt(%16.0gc 2 2) label("N" "\$R^2$")) 
  noabbrev label gaps varwidth(30) booktabs nodep nonum nomtitles collabels(none) 
  prehead(
  `"\multicolumn{5}{l}{\textbf{Panel B: Linear Probability Model (Y = Wait Time $>$ 30min)}} \\"'
  `"\hline"') postfoot(`"Polling Area Controls? &No&No&Yes&Yes&Yes \\"'
  `"State FE?                       &No&No&No&Yes&Yes \\"'
  `"County FE?                       &No&No&No&No&Yes \\"'
  `"\hline\hline"' `"\bottomrule"' 
  `"\multicolumn{6}{l}{\footnotesize \sym{*} \(p<0.10\), 
  \sym{**} \(p<0.05\), \sym{***} \(p<0.01\)} \\"' `"\end{tabular}"' `"}"') ;
#d cr

***************************************************************************************************/
*  4. Appendix C (Descriptive Stats by State and CD)
***************************************************************************************************

**********
* Appendix Table C1: State Level Measures of Wait Time and Disparities
**********

use "${data_dir}/county_voterwaittimes.dta", clear
gen state_ns = subinstr(state_name," ","",.) 
matrix define SSum=J(46,7,.)
levelsof state_ns, local(state_nospace)
matrix rownames SSum = `state_nospace'
local j = 1
foreach i in `state_nospace' {
    * N Wait Time
    qui sum state_waittime_N if state_ns == "`i'"
    matrix SSum[`j',1] = r(mean)
    * Mean Wait Time
    qui sum state_waittime_avg if state_ns == "`i'"
    matrix SSum[`j',2] = r(mean)
    * SD Wait Time
    qui sum state_waittime_sd if state_ns == "`i'"
    matrix SSum[`j',3] = r(mean)
    * Empirical Bayes Adjusted Mean Wait Time
    qui sum state_waittime_avg_ebayes if state_ns == "`i'"
    matrix SSum[`j',4] = r(mean)
    * Disparity within State
    sum state_coef if state_ns == "`i'"
    matrix SSum[`j',5] = r(mean)
    * SE of Disparity within State
    sum state_se if state_ns == "`i'"
    matrix SSum[`j',6] = r(mean)
    * Empirical Bayes Adjusted Disparity within State
    sum state_coef_ebayes if state_ns == "`i'"
    matrix SSum[`j',7] = r(mean)
    local j=`j'+1
}
outtable using $result_dir/app_C_t1_state_raw, mat(SSum) replace nodots ///
  f(%9.0fc %9.2fc %9.2fc %9.2fc %9.2fc %9.2fc %9.2fc)

**********
* Appendix Table C2: Congressional District Level Measures of Wait Time and Disparities
**********

use "${data_dir}/cces_comparison.dta", clear
gen state_ns = subinstr(state_name," ","",.) 
gen state_cd = state_ns+"_"+cd115
levelsof state_cd, local(cd_names)
matrix define CDSum=J(410,7,.)
matrix rownames CDSum = `cd_names'
local j = 1
foreach i in `cd_names' {
    * N Wait Time
    qui sum cd_waittime_N if state_cd == "`i'"
    matrix CDSum[`j',1] = r(mean)
    * Mean Wait Time
    qui sum cd_waittime_avg if state_cd == "`i'"
    matrix CDSum[`j',2] = r(mean)
    * SD Wait Time
    qui sum cd_waittime_sd if state_cd == "`i'"
    matrix CDSum[`j',3] = r(mean)
    * Empirical Bayes Adjusted Mean Wait Time
    qui sum cd_waittime_avg_ebayes if state_cd == "`i'"
    matrix CDSum[`j',4] = r(mean)
    * Disparity within CD
    sum cd_coef if state_cd == "`i'"
    matrix CDSum[`j',5] = r(mean)
    * SE of Disparity within CD
    sum cd_se if state_cd == "`i'"
    matrix CDSum[`j',6] = r(mean)
    * Empirical Bayes Adjusted Disparity within CD
    sum cd_coef_ebayes if state_cd == "`i'"
    matrix CDSum[`j',7] = r(mean)
    local j=`j'+1
}
outtable using $result_dir/app_C_t2_cd_raw, mat(CDSum) replace nodots ///
  f(%9.0fc %9.2fc %9.2fc %9.2fc %9.2fc %9.2fc %9.2fc)
  
**********
* Appendix Table C3: (Largest 100) County-Level Measures of Wait Time and Disparities
**********

use "${data_dir}/county_voterwaittimes.dta", clear
gsort - county_pop
keep if _n<=100
replace county_pop = county_pop*1000
tab county_name
replace county_name = "DC" if county_name == "District Of Columbia"
replace county_name = "StLouis" if county_name == "St. Louis"
replace county_name = "DuPage" if county_name == "Dupage"

gen state_ns = subinstr(state_name," ","",.) 
gen county_ns = subinstr(county_name," ","",.) 
gen county_state = county_ns+"_"+state_ns
levelsof county_state, local(c_names)
matrix define CSum=J(100,8,.)
matrix rownames CSum = `c_names'
local j = 1
foreach i in `c_names' {
    * County Population
    qui sum county_pop if county_state == "`i'"
    matrix CSum[`j',1] = r(mean)    
    * N Wait Time
    qui sum county_waittime_N if county_state == "`i'"
    matrix CSum[`j',2] = r(mean)
    * Mean Wait Time
    qui sum county_waittime_avg if county_state == "`i'"
    matrix CSum[`j',3] = r(mean)
    * SD Wait Time
    qui sum county_waittime_sd if county_state == "`i'"
    matrix CSum[`j',4] = r(mean)
    * Empirical Bayes Adjusted Mean Wait Time
    qui sum county_waittime_avg_ebayes if county_state == "`i'"
    matrix CSum[`j',5] = r(mean)
    * Disparity within State
    sum county_coef if county_state == "`i'"
    matrix CSum[`j',6] = r(mean)
    * SE of Disparity within State
    sum county_se if county_state == "`i'"
    matrix CSum[`j',7] = r(mean)
    * Empirical Bayes Adjusted Disparity within State
    sum county_coef_ebayes if county_state == "`i'"
    matrix CSum[`j',8] = r(mean)
    local j=`j'+1
}
outtable using $result_dir/app_C_t3_county_raw, mat(CSum) replace nodots ///
  f(%9.0fc %9.0fc %9.2fc %9.2fc %9.2fc %9.2fc %9.2fc %9.2fc)

log close

stop


